Aminet 15

home *** CD-ROM | disk | FTP | other *** search

/ Aminet 15 / Aminet 15 - Nov 1996.iso / Aminet / util / shell / shtml.lha / shtml.c < prev next >

Wrap

C/C++ Source or Header | 1992-09-02 | 7.6 KB | 265 lines

// CMU96 // .html to .txt converter. // Supplied 'as is' // NO WARRENTY INCLUDED! // If this code doesn't do what you expect, sorry, but it isn't my fault! // It isn't likley to be damaging but there is a small chance I've overlooked // something. It works for me though. // Compiled with SAS/C compiler. // I can't garantee this compiling with anything else but it should be about // 95% portable across Amiga, PC and Unix systems. /* ** One day, it might... ** ** remove all unknown tags. ** cat from and to files. ** have command line switches and intelligence. ** do clever stuff with word wrap and line wrap etc. ** have adjustable margins/tab spaces etc. ** recognise <br> <p>PARAGRAPH</p> <a href>INVERT</a> <center>CENTER</center> ** <h?>PARAGRAPH</h?> <blink>UNDERLINE</blink> <title>TITLE\n</title> ** <pre>PREFORMATTED</pre>  */ // But I doubt it... #include <stdio.h> #include <stdlib.h> #include <string.h> #define width 77 void getword(char *word,FILE *in); int preprocess(char *word,FILE *in,FILE *out,int x,int pure); int tag(char *word); int tagis(char *word,char *check); main(int argc,char *argv[]) { FILE *in,*out; char word[width]; int argin=1,argout=2,pure=0; int x=0; // THE X Value (distance from left edge of line in characters) // Check for correct arguements. If bad, report usage if ((argc<2) || (argc>4) || (strcmp(argv[1],"?")==0)) { printf("Usage: %s (-h|-help)|([-p|-pure] infile.html [outfile.txt])\n" ,argv[0]); exit(0); } // Check for -help flag if ((strcmp(argv[1],"-help")==0) || (strcmp(argv[1],"-h")==0)) { printf("\n"); printf("%s by Chris Underwood. Compiled with SAS/C. CMU",argv[0]); puts("\n"); puts("Converts .html files into a readable form for displaying in a text"); puts("only shell. An outfile may be specified or the produced text can be"); puts("redirected or piped."); puts("Options:"); puts(" -h -help\tPrints this text"); puts(" -p -pure\tSurpress any non-ascii output (inverse text etc)"); puts(" infile\t\tSource html file (.html)"); puts(" outfile\tDestination file (.txt)"); puts("If no outfile is specified then text output will be sent to stdout."); puts("If outfile exists it will be overwritten."); puts("This program is supplied \'as is\' and has absolutly no warrenty"); puts("whatsoever. If it (unlikely) screws up your HD, it \'aint my fault!"); printf("\n"); exit(0); } // Check for pure flag, and adjust where to expect *in and *out in the // arguement list. if ((strcmp(argv[1],"-p")==0) || (strcmp(argv[1],"-pure")==0)) { argin=2; // Set in and out args argout=3; pure=1; // Set pure flag } // Open infile and check to see if it exists. Give error if necessary. if ((in=fopen(argv[argin],"r"))==NULL) { fprintf(stderr,"%s: Failed to open file %s for reading\n", argv[0],argv[argin]); exit(1); } // Similar check and open outfile. if (argc==argout+1) { if ((out=fopen(argv[argout],"w"))==NULL) { fprintf(stderr,"%s: Failed to open file %s for writing\n", argv[0],argv[argout]); exit(1); } } else { // Set out to be pointing at stdout. Somehow, this actually works! out=stdout; } // Main processing loop while (!feof(in)) // Better check for eof alot... { getword(word,in); if (tag(word)) x=preprocess(word,in,out,x,pure); else { if (x+strlen(word)+1<width) // If word fits on line... { x+=strlen(word)+1; // Update the X value fprintf(out,"%s ",word); // And print the word (with space) } else { x=strlen(word)+1; // Reset the X value fprintf(out,"\n%s ",word); // And print the word on a new line } } } // Cleanup, close files then exit. if (!pure) { fprintf(out,"%c0m\n",155); // Print a newline and set normal text } else { fprintf(out,"\n"); // Shouldn't set normal text here - code is impure } fclose(in); fclose(out); exit(0); // Successful conversion from html to txt! } void getword(char *word,FILE *in) { int j,k,ch; j=0; for (k=0; k<width; k++) word[k]='\0'; // Like strnset(), except this works. word[0]=getc(in); // Remove leading spaces while (((word[0]==' ') || (word[0]=='\n') || (word[0]=='\t')) && !feof(in)) word[0]=getc(in); if (word[0]=='<') // Tag found. { k=1; // Start paranthesis count while (!feof(in)) // Better be safe now... { j++; if (j==width) j--; // Simple fix suggested by Xav (cheers). // Not very neat, but it works since only comment // tags and bad-html have tags this long word[j]=getc(in); if (word[j]=='<') k++; // Like a stack, but with no data! if (word[j]=='>') k--; if (k==0) break; // Angle brackets are now matched } } else // Not a tag - a normal word. { // Get the word while (!feof(in)) // While not eof { ch=getc(in); if (ch==' ') break; // And not a space... if (ch=='\n') break; // And not a newline... if (ch=='\t') break; // And not a tab charecter... if (j==width) break; // And not the max width of a word... if (ch=='<') { ungetc(ch,in); break; } j++; word[j]=ch; // Attach ch to the end of word[] } if (feof(in)) word[0]='\0'; } } int preprocess(char *word,FILE *in,FILE *out,int x,int pure) { char smallbuffer[7]; // Used to detect </pre> tag int j; if (tagis(word,"br") || tagis(word,"h") || tagis(word,"p") || tagis(word,"/h") || tagis(word,"/p")) { fprintf(out,"\n"); // Print a newline x=0; // Set the X value to the start of the line } if (!pure) // If we are not giving pure output... { if (tagis(word,"title") || tagis(word,"a") || tagis(word,"blink")) { fprintf(out,"%c7m",155); // Set inverted text } if (tagis(word,"/title") || tagis(word,"/a") || tagis(word,"/blink")) { fprintf(out,"%c0m",155); // Set normal text and print a newline fprintf(out,"\b "); } } if (tagis(word,"/title")) // We need a newline... { x=0; fprintf(out,"\n"); } if (tagis(word,"pre")) // Sort this here. Should be a function but I'm lazy! { smallbuffer[6]='\0'; // So that strcmp() works. printf("\n"); x=0; // After the </pre> tag we will print a newline for (j=0; (j<6) && !feof(in); j++) // Fill the small buffer { smallbuffer[j]=fgetc(in); } while(!feof(in) && !(strcmp(smallbuffer,"</pre>")==0)) { //fprintf(stderr,"%s\n",smallbuffer); // Print first buffer char fprintf(out,"%c",smallbuffer[0]); // Shuffle buffer left for (j=1; j<6; j++) smallbuffer[j-1]=smallbuffer[j]; // Get a new char smallbuffer[5]=fgetc(in); } // Exit conditions checked by while loop // No need to dump the small buffer because it only contains </pre> anyway printf("\n"); } return x; } int tagis(char *word,char *check) { int same=1,j; for (j=0; j<strlen(check); j++) // Walk through check string { if (word[j+1] != check[j]) // If we have unequal charecters... { same=0; // State the inequality break; // Not much point in continuing now... } } return same; // And send back the result } int tag(char *word) { if (word[0]=='<') return 1; else return 0; }